library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ stringr 1.4.0
## ✓ tidyr   1.0.2     ✓ forcats 0.5.0
## ✓ readr   1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(viridis)
## Loading required package: viridisLite
report_3_31_2020 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-31-2020.csv"))
## Parsed with column specification:
## cols(
##   FIPS = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
head(report_3_31_2020)
## # A tibble: 6 x 12
##   FIPS  Admin2 Province_State Country_Region Last_Update           Lat  Long_
##   <chr> <chr>  <chr>          <chr>          <dttm>              <dbl>  <dbl>
## 1 45001 Abbev… South Carolina US             2020-03-31 23:43:56  34.2  -82.5
## 2 22001 Acadia Louisiana      US             2020-03-31 23:43:56  30.3  -92.4
## 3 51001 Accom… Virginia       US             2020-03-31 23:43:56  37.8  -75.6
## 4 16001 Ada    Idaho          US             2020-03-31 23:43:56  43.5 -116. 
## 5 19001 Adair  Iowa           US             2020-03-31 23:43:56  41.3  -94.5
## 6 29001 Adair  Missouri       US             2020-03-31 23:43:56  40.2  -92.6
## # … with 5 more variables: Confirmed <dbl>, Deaths <dbl>, Recovered <dbl>,
## #   Active <dbl>, Combined_Key <chr>
str(report_3_31_2020)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 2434 obs. of  12 variables:
##  $ FIPS          : chr  "45001" "22001" "51001" "16001" ...
##  $ Admin2        : chr  "Abbeville" "Acadia" "Accomack" "Ada" ...
##  $ Province_State: chr  "South Carolina" "Louisiana" "Virginia" "Idaho" ...
##  $ Country_Region: chr  "US" "US" "US" "US" ...
##  $ Last_Update   : POSIXct, format: "2020-03-31 23:43:56" "2020-03-31 23:43:56" ...
##  $ Lat           : num  34.2 30.3 37.8 43.5 41.3 ...
##  $ Long_         : num  -82.5 -92.4 -75.6 -116.2 -94.5 ...
##  $ Confirmed     : num  4 39 7 195 1 2 4 181 2 1 ...
##  $ Deaths        : num  0 1 0 3 0 0 0 2 0 0 ...
##  $ Recovered     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Active        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Combined_Key  : chr  "Abbeville, South Carolina, US" "Acadia, Louisiana, US" "Accomack, Virginia, US" "Ada, Idaho, US" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   FIPS = col_character(),
##   ..   Admin2 = col_character(),
##   ..   Province_State = col_character(),
##   ..   Country_Region = col_character(),
##   ..   Last_Update = col_datetime(format = ""),
##   ..   Lat = col_double(),
##   ..   Long_ = col_double(),
##   ..   Confirmed = col_double(),
##   ..   Deaths = col_double(),
##   ..   Recovered = col_double(),
##   ..   Active = col_double(),
##   ..   Combined_Key = col_character()
##   .. )

Total number of cases for each US state.

fig1 <- report_3_31_2020 %>% 
  filter (Country_Region == "US") %>%
  group_by(Province_State) %>%
  summarise(Total = sum(Confirmed) + sum(Deaths) + sum(Recovered)) %>%
  ggplot(aes(x = Total,  y = reorder(Province_State, Total))) + 
    geom_point() +
    ggtitle("Total number of Cases for each US State as of 3-31-20") +
    ylab("Country/Region") +
    xlab("Cases")

ggplotly(fig1)

10 Nation with the Fewest Confirmed Cases

fig2 <- report_3_31_2020 %>% 
  group_by(Country_Region) %>%
  summarise(Confirmed = sum(Confirmed)) %>%
  arrange(Confirmed) %>%
  dplyr::slice(1:10) %>%
  ggplot(aes(y = Confirmed,  x = reorder(Country_Region, Confirmed))) + 
    geom_bar(stat = 'identity') +
    ggtitle("10 Nation with Fewest Confirmed Cases as of 3-31-20") +
    ylab("Confirmed Cases") +
    xlab("Country/Region") +
    coord_flip()

ggplotly(fig2)
US_Time_Series_Confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"))
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Combined_Key = col_character()
## )
## See spec(...) for full column specifications.
US_Time_Series_Confirmed_Long <- US_Time_Series_Confirmed %>% 
               pivot_longer(-c(UID, iso2, iso3, code3, FIPS, Admin2, Province_State, Country_Region, Lat, Long_, Combined_Key),
                            names_to = "Date", values_to = "Confirmed") 

Global_Time_Series_Confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
    dplyr::rename(Province_State = "Province/State", Country_Region = "Country/Region")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
Global_Time_Series_Confirmed_Long <- Global_Time_Series_Confirmed %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                            names_to = "Date", values_to = "Confirmed") 

US_Time_Series_Deaths <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")) %>%
  select(-Population)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Combined_Key = col_character()
## )
## See spec(...) for full column specifications.
US_Time_Series_Deaths_Long <- US_Time_Series_Deaths %>% 
               pivot_longer(-c(UID, iso2, iso3, code3, FIPS, Admin2, Province_State, Country_Region, Lat, Long_, Combined_Key),
                            names_to = "Date", values_to = "Deaths") 

Global_Time_Series_Deaths <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
      dplyr::rename(Province_State = "Province/State", Country_Region = "Country/Region")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
Global_Time_Series_Deaths_Long <- Global_Time_Series_Deaths %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                            names_to = "Date", values_to = "Deaths") 

Global_Time_Series_Recovered <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")) %>%
      dplyr::rename(Province_State = "Province/State", Country_Region = "Country/Region")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
Global_Time_Series_Recovered_Long <- Global_Time_Series_Recovered %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                            names_to = "Date", values_to = "Recovered") 
US_Time_Series_Confirmed_Long <- US_Time_Series_Confirmed_Long %>%
  unite(Key, Combined_Key, Date, sep = ".", remove = FALSE)

US_Time_Series_Deaths_Long <- US_Time_Series_Deaths_Long %>%
    unite(Key, Combined_Key, Date, sep = ".", remove = FALSE)

US_Time_Series_Deaths_Long2 <- US_Time_Series_Deaths_Long %>%
  select(Key, Deaths)

US_Time_Series_Concat <- full_join(US_Time_Series_Confirmed_Long, US_Time_Series_Deaths_Long2, by = c("Key")) %>%
  select(-Key)

US_Time_Series_Concat
## # A tibble: 358,710 x 14
##      UID iso2  iso3  code3  FIPS Admin2 Province_State Country_Region   Lat
##    <dbl> <chr> <chr> <dbl> <dbl> <chr>  <chr>          <chr>          <dbl>
##  1    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  2    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  3    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  4    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  5    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  6    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  7    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  8    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  9    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
## 10    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
## # … with 358,700 more rows, and 5 more variables: Long_ <dbl>,
## #   Combined_Key <chr>, Date <chr>, Confirmed <dbl>, Deaths <dbl>
US_Time_Series_Concat$Date <- mdy(US_Time_Series_Concat$Date)

US_Time_Series_Concat
## # A tibble: 358,710 x 14
##      UID iso2  iso3  code3  FIPS Admin2 Province_State Country_Region   Lat
##    <dbl> <chr> <chr> <dbl> <dbl> <chr>  <chr>          <chr>          <dbl>
##  1    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  2    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  3    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  4    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  5    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  6    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  7    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  8    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  9    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
## 10    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
## # … with 358,700 more rows, and 5 more variables: Long_ <dbl>,
## #   Combined_Key <chr>, Date <date>, Confirmed <dbl>, Deaths <dbl>
US_Time_Series_Concat_Counts <- US_Time_Series_Concat %>%
  pivot_longer(-c(UID, iso2, iso3, code3, FIPS, Admin2, Province_State, Country_Region, Lat, Long_, Date, Combined_Key),
               names_to = "Report_Type", values_to = "Counts")

US_Time_Series_Concat_Counts
## # A tibble: 717,420 x 14
##      UID iso2  iso3  code3  FIPS Admin2 Province_State Country_Region   Lat
##    <dbl> <chr> <chr> <dbl> <dbl> <chr>  <chr>          <chr>          <dbl>
##  1    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  2    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  3    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  4    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  5    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  6    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  7    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  8    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
##  9    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
## 10    16 AS    ASM      16    60 <NA>   American Samoa US             -14.3
## # … with 717,410 more rows, and 5 more variables: Long_ <dbl>,
## #   Combined_Key <chr>, Date <date>, Report_Type <chr>, Counts <dbl>
Global_Time_Series_Confirmed_Long <- Global_Time_Series_Confirmed_Long %>%
    unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)

Global_Time_Series_Deaths_Long <- Global_Time_Series_Deaths_Long %>%
  unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)

Global_Time_Series_Recovered_Long <- Global_Time_Series_Recovered_Long %>%
  unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)

Global_Time_Series_Deaths_Long2 <- Global_Time_Series_Deaths_Long %>%
  select(Key, Deaths)

Global_Time_Series_Recovered_Long2 <- Global_Time_Series_Recovered_Long %>%
  select(Key, Recovered)

Global_Time_Series_Concat_Temp <- full_join(Global_Time_Series_Confirmed_Long, Global_Time_Series_Deaths_Long2, by = c("Key"))

Global_Time_Series_Concat <- full_join(Global_Time_Series_Concat_Temp, Global_Time_Series_Recovered_Long2, by = c("Key")) %>%
  select(-Key)

Global_Time_Series_Concat
## # A tibble: 29,370 x 8
##    Province_State Country_Region   Lat  Long Date    Confirmed Deaths Recovered
##    <chr>          <chr>          <dbl> <dbl> <chr>       <dbl>  <dbl>     <dbl>
##  1 <NA>           Afghanistan       33    65 1/22/20         0      0         0
##  2 <NA>           Afghanistan       33    65 1/23/20         0      0         0
##  3 <NA>           Afghanistan       33    65 1/24/20         0      0         0
##  4 <NA>           Afghanistan       33    65 1/25/20         0      0         0
##  5 <NA>           Afghanistan       33    65 1/26/20         0      0         0
##  6 <NA>           Afghanistan       33    65 1/27/20         0      0         0
##  7 <NA>           Afghanistan       33    65 1/28/20         0      0         0
##  8 <NA>           Afghanistan       33    65 1/29/20         0      0         0
##  9 <NA>           Afghanistan       33    65 1/30/20         0      0         0
## 10 <NA>           Afghanistan       33    65 1/31/20         0      0         0
## # … with 29,360 more rows
Global_Time_Series_Concat$Date <- mdy(Global_Time_Series_Concat$Date)

Global_Time_Series_Concat
## # A tibble: 29,370 x 8
##    Province_State Country_Region   Lat  Long Date       Confirmed Deaths
##    <chr>          <chr>          <dbl> <dbl> <date>         <dbl>  <dbl>
##  1 <NA>           Afghanistan       33    65 2020-01-22         0      0
##  2 <NA>           Afghanistan       33    65 2020-01-23         0      0
##  3 <NA>           Afghanistan       33    65 2020-01-24         0      0
##  4 <NA>           Afghanistan       33    65 2020-01-25         0      0
##  5 <NA>           Afghanistan       33    65 2020-01-26         0      0
##  6 <NA>           Afghanistan       33    65 2020-01-27         0      0
##  7 <NA>           Afghanistan       33    65 2020-01-28         0      0
##  8 <NA>           Afghanistan       33    65 2020-01-29         0      0
##  9 <NA>           Afghanistan       33    65 2020-01-30         0      0
## 10 <NA>           Afghanistan       33    65 2020-01-31         0      0
## # … with 29,360 more rows, and 1 more variable: Recovered <dbl>
Global_Time_Series_Concat_Counts <- Global_Time_Series_Concat %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
               names_to = "Report_Type", values_to = "Counts")

Global_Time_Series_Concat_Counts
## # A tibble: 88,110 x 7
##    Province_State Country_Region   Lat  Long Date       Report_Type Counts
##    <chr>          <chr>          <dbl> <dbl> <date>     <chr>        <dbl>
##  1 <NA>           Afghanistan       33    65 2020-01-22 Confirmed        0
##  2 <NA>           Afghanistan       33    65 2020-01-22 Deaths           0
##  3 <NA>           Afghanistan       33    65 2020-01-22 Recovered        0
##  4 <NA>           Afghanistan       33    65 2020-01-23 Confirmed        0
##  5 <NA>           Afghanistan       33    65 2020-01-23 Deaths           0
##  6 <NA>           Afghanistan       33    65 2020-01-23 Recovered        0
##  7 <NA>           Afghanistan       33    65 2020-01-24 Confirmed        0
##  8 <NA>           Afghanistan       33    65 2020-01-24 Deaths           0
##  9 <NA>           Afghanistan       33    65 2020-01-24 Recovered        0
## 10 <NA>           Afghanistan       33    65 2020-01-25 Confirmed        0
## # … with 88,100 more rows
fig3 <- US_Time_Series_Concat %>%
  group_by(Province_State, Date) %>%
  summarize_at(c("Confirmed", "Deaths"), sum) %>%
  filter(Province_State %in% c("Connecticut", "Maine", "New Hampshire", "Vermont", "New York", "Rhode Island", "Massachusetts")) %>%
  ggplot(aes(x = Date,  y = Confirmed, color = Province_State)) + 
    geom_point() +
    geom_line() +
    ggtitle("COVID-19 Cases in the US Northeast")

ggplotly(fig3)
fig4 <- Global_Time_Series_Concat_Counts %>%
  group_by(Country_Region, Date) %>%
  filter(Country_Region %in% c("Morocco", "Algeria", "Tunisia", "Libya", "Egypt", "Sudan")) %>%
  ggplot(aes(x = Date, y = Counts, color = Report_Type)) +
    geom_point() +
    geom_line() +
    ggtitle("COVID-19 Cases in Northern Africa") +
    facet_wrap(~Country_Region, ncol = 2, scales = "free_y")

ggplotly(fig4)
fig5 <-ggplot(report_3_31_2020 %>%
         filter(Country_Region == "US") %>%
              filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
                  "Puerto Rico","Northern Mariana Islands", 
                  "Virgin Islands", "Recovered", "Guam", "Grand Princess",
                  "District of Columbia", "Diamond Princess")) %>%
              filter(Lat > 0) %>%
              arrange(desc(Confirmed)) %>%
              dplyr::slice(1:100), aes(x = Long_, y = Lat, size = Confirmed)) +
    borders("state", colour = "black", fill = "grey") +
    theme_bw() +
    geom_point(shape = 21, color='red', fill='red', alpha = 0.5) +
    labs(title = '100 Regions with the Most US COVID-19 Confirmed cases as of 3-31-20',x = '', y = '',
        size="Cases)") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5)

ggplotly(fig5)
temp <- subset(Global_Time_Series_Concat, Date %in% seq.Date(min(Date, na.rm = TRUE), as.Date("2020/04/29"), 14))

fig6 <- ggplot(temp, aes(x = Long, y = Lat, size = Confirmed)) +
    borders("world", colour = "black", fill = "grey") +
    theme_bw() +
    geom_point(shape = 21, color='red', fill='red', alpha = 0.5) +
    labs(title = 'Global Growth of the Confirmed Virus Cases',x = 'Longitude', y = 'Latitude',
        size="Cases") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5) +
    facet_wrap(.~Date, ncol = 2)

fig6